
# coding: utf-8

# In[1]:

import os

os.chdir("D:\\USPTO\\BULK\\1976-2001")

files=['1976.dat','1977.dat','1978.dat','1979.dat','1980.dat','1981.dat','1982.dat','1983.dat','1984.dat','1985.dat'
       ,'1986.dat','1987.dat','1988.dat','1989.dat','1990.dat','1991.dat','1992.dat','1993.dat','1994.dat','1995.dat'
       ,'1996.dat','1997.dat','1998.dat','1999.dat','2000.dat','2001.dat']
operators=['PATN','INVT','ASSG','CLAS','UREF','FREF','LREP','DCLM','OREF','ABST','PRIR']
fout=open('1976_2001.csv','w',encoding='utf-8')

for file in files:
    print(file)
    fin=open(file,'r',encoding='ISO-8859-1')
    LAST, datas, WKU= '', {}, ''    
    for line in fin:
        line=line.strip()
        token=line.split(' ',1)
        if token[0] in operators:
            
            
            if LAST=='CLAS':
                ICL=''
                if 'ICL' in datas:
                    ICL=datas['ICL']
                if len(WKU)>0:
                    if WKU[0]=='0' : fout.write(WKU[1:]+"*"+ICL+'\n')
     
            if token[0]=='PATN':
                WKU=""  
                
            LAST=token[0]
            datas={}
            
            
            
        else:
            if len(token)==1:
                continue
            if token[0]=='WKU':
                if len(token[1].strip())==9:
                    WKU=token[1].strip()[:-1]
                else:
                    WKU=token[1].strip()
            else:
                datas[token[0]]=token[1].strip()
    fin.close()
    
fout.close()
print("DONE")
                


